library(ngsReports)
library(edgeR)
library(magrittr)
library(tidyverse)
library(ggrepel)
theme_set(theme_bw())

Load all FastQC summaries

rawFqc <- list.files("../0_rawData/FastQC/", pattern = "zip$", full.names = TRUE) %>%
    getFastqcData()
trimFqc <- list.files("../1_trimmedData/FastQC/", pattern = "zip$", full.names = TRUE) %>%
    getFastqcData()
alnFqc <- list.files("../2_alignedData/FastQC/", pattern = "zip$", full.names = TRUE) %>%
    getFastqcData()

Read Totals

rt <- list(
    readTotals(rawFqc) %>% mutate(Stage = "Raw"),
    readTotals(trimFqc) %>% mutate(Stage = "Trimmed"),
    readTotals(alnFqc) %>% mutate(Stage = "Aligned")
) %>%
    bind_rows() %>%
    mutate(Sample = str_remove_all(Filename, "(_R1.fq.gz|Aligned.sortedByCoord.out.bam)"),
                 Sample = factor(Sample, levels = str_sort(unique(Sample), numeric = TRUE)),
                 Stage = factor(Stage, levels = c("Raw", "Trimmed", "Aligned")))

Summary of library sizes after each step. The slight increase after alignments indicates a low level of multiple alignments

Gene-level Counts

Gene-level counts were loaded without filtering and an MDS plot generated in order to check whether replicate libraries grouped correctly.

counts <- read_tsv("../2_alignedData/featureCounts/genes.out") %>%
    set_names(basename(colnames(.))) %>%
    set_names(str_remove_all(colnames(.), "Aligned.sortedByCoord.out.bam"))
dge <- counts %>% 
    as.data.frame() %>%
    column_to_rownames("Geneid") %>%
    DGEList() %>%
    calcNormFactors()
dge$samples %<>%
    rownames_to_column("library") %>%
    mutate(Genotype = str_extract_all(library, "(FAD|FS|__)"),
                 Genotype = str_replace_all(Genotype, "__", "WT"),
                 Genotype = factor(Genotype, levels = c("WT", "FS", "FAD")),
                 Sample = str_replace(library, "__-", "_WT"),
                 Sample = ifelse(str_count(Sample, "_") == 3, Sample, paste0(Sample, "_1")),
                 Replicate = str_extract(Sample, "[12]$"),
                 Sample = str_remove(Sample, "_[12]$"),
                 group = as.integer(Genotype)) %>%
    column_to_rownames("library")
mds <- plotMDS(dge, plot = FALSE) %>%
    extract2("cmdscale.out") %>%
    set_colnames(paste0("Dim", 1:2)) %>%
    as.data.frame() %>%
    rownames_to_column("Library") %>%
    as_tibble() 
*Plot of replicate libraries showing good concordance between replicates. These will be combined and low-expressed genes removed for the DE analysis*

Plot of replicate libraries showing good concordance between replicates. These will be combined and low-expressed genes removed for the DE analysis